# Set of climatic variables# =========================clim_var <-readRDS(here("data/ClimaticData/NamesSelectedVariables.rds"))# Climatic data# =============# we scale the past and future climatic data at the location of the populations# with the parameters (mean and variance) of the past climatic data.source(here("scripts/functions/generate_scaled_clim_datasets.R"))clim_dfs <-generate_scaled_clim_datasets(clim_var,clim_ref_adj =FALSE)
Running GF models
Code
snp_sets <-lapply(snp_sets, function(snp_set) {# Warning! Important to sort the SNP names, otherwise# the colors in the species (allele) cumulative plots # do not correspond to the right allelesgeno_sub <- geno %>% dplyr::select(all_of(sort(snp_set$set_snps)))set.seed(3) # for reproducibility, as the results of GF models vary from one run to anothersnp_set$gf_mod <-gradientForest(data.frame(clim_dfs$clim_ref[,-1], geno_sub), predictor.vars=clim_var, response.vars=colnames(geno_sub), corr.threshold=0.5, ntree=500, trace=T)return(snp_set)})snp_sets %>%saveRDS(here("outputs/GF/List_GF_models.rds"))
Number of alleles for which the climatic variables have some predictive power:
Code
tab <-lapply(snp_sets, function(x){tibble("SNP sets"= x$set_name,"Total number of SNPs"=length(x$set_snps),"Number of SNPs with predictive power"= x$gf_mod$species.pos.rsq)}) %>%bind_rows()tab %>%write_csv(here("tables/NbSNPsWithPredictivePowerGFmodels.csv"))tab %>%kable_mydf()
We generate some plots to evaluate the GF models (stored in the files GFplots_[SNP set code].pdf):
Predictor overall importance plots. They show the mean accuracy importance and the mean importance weighted by SNPs \(\mathcal{R}^2\).
Splits density plots. They show the binned split importance and location on each gradient (spikes), kernel density of splits (blacklines), of observations (red lines) and of splits standardised by observations density (bluelines). Each distribution integrates to predictor importance. These show where important changes in the abundance of multiple alleles are occurring along the gradient; they indicate a composition change rate.
Species (in our case alleles) cumulative plots. They show, for each SNPs, the cumulative importance distributions of splits improvement scaled by \(\mathcal{R}^2\) weighted importance, and standardised by density of observations. These show cumulative change in the presence of individual allele, where changes occur on the gradient, and the alleles changing most on each gradient.
Predictor cumulative plots. They show, for each predictor, the cumulative importance distributions of splits improvement scaled by \(\mathcal{R}^2\) weighted importance, and standardised by density of observations, averaged over all SNPs. These show cumulative change in overall allelic composition, and where changes occur on the gradient.
\(\mathcal{R}^2\)measure of the fit of the random forest model for each SNPs.
The code to generate those plots comes from ‘Example analysis of biodiversity survey data with R package gradientForest’ by C. Roland Pitcher, Nick Ellis and Stephen J. Smith (pdf available here).
Code
# ==============================# Functions to make the GF plots# ==============================# splits density plotmake_split_density_plot <-function(x){plot(x,plot.type="S",imp.vars=names(importance(x)),leg.posn="topright",cex.legend=1,cex.axis=1, cex.lab=1.2,line.ylab=0.9,par.args=list(mgp=c(1.5, 0.5,0),mar=c(3.1,1.5,0.1,1),omi =c(0.1, 0.3, 0.1, 0.1))) }# predictor cumulative plotmake_predictor_cumulative_plot <-function(x){plot(x, plot.type="C",imp.vars=names(importance(x)),show.species=F,common.scale=T,cex.axis=1, cex.lab=1.5,line.ylab=1,par.args=list(mgp=c(1.5, 0.5,0),mar=c(2.5,1,0.1,0.5),omi=c(0, +0.3,0,0)))}# species cumulative plotmake_species_cumulative_plot <-function(x){plot(x,plot.type="C",imp.vars=names(importance(x)), show.overall=F,legend=T,leg.posn="topleft", leg.nspecies=10,cex.lab=1,cex.legend=1, cex.axis=1,line.ylab=1,par.args=list(mgp=c(1.5, 0.5,0),mar=c(2.5,1,0.1,0.5),omi=c(0, +0.3,0,0))) }# R2 measure of the fit of the random forest model for each speciesmake_performance_plot <-function(x, horizontal=FALSE){ old.mar<-par()$marpar(mfrow=c(1,1),mar=old.mar+c(0,0,0,0)) Ylab <-expression(R^2) perf <-importance(x, type="Species") n <-length(perf)if (horizontal)plot(perf, 1:n, las =2, pch=19, axes=F, xlab="", ylab="")elseplot(1:n, perf, las =2, pch=19, axes=F, xlab="", ylab="")axis(labels=names(perf),side=1+horizontal,at=1:n, cex.axis=0.7, padj=0,las=2)axis(side=2-horizontal, cex.axis=1)mtext(Ylab,side=2-horizontal,line=2)title("Overall performance of random forests over loci")abline(h =0, lty =2)box()par(mar=old.mar)}# ===============================================# Generate the GF plots for the Github repository# ===============================================lapply(snp_sets, function(snp_set){pdf(here(paste0("figs/GF/GFplots_",snp_set$set_code,".pdf")), width=12,height=8)# Overall importance plotplot(snp_set$gf_mod, plot.type="Overall.Importance")# splits density plot# make_split_density_plot(x=snp_set$gf_mod)# species cumulative plotmake_species_cumulative_plot(x=snp_set$gf_mod)# predictor cumulative plotmake_predictor_cumulative_plot(x=snp_set$gf_mod)# R2 measure of the fit of the random forest model for each speciesmake_performance_plot(x=snp_set$gf_mod)dev.off()})# =======================================================# Generate the GF plots for the Supplementary Information (if needed)# =======================================================lapply(snp_sets[c(1,3)], function(snp_set){# Overall importance plotpdf(here(paste0("figs/GF/GFplots_OverallImportance_",snp_set$set_code,"_SI.pdf")), width=8,height=5)plot(snp_set$gf_mod, plot.type="Overall.Importance")dev.off()# Splits density plotpdf(here(paste0("figs/GF/GFplots_SplitDensityPlot_",snp_set$set_code,"_SI.pdf")), width=8,height=5)make_split_density_plot(x=snp_set$gf_mod)dev.off()# Allele cumulative plotpdf(here(paste0("figs/GF/GFplots_AlleleCumulativePlot_",snp_set$set_code,"_SI.pdf")), width=7,height=7)make_species_cumulative_plot(x=snp_set$gf_mod)dev.off()# predictor cumulative plotpdf(here(paste0("figs/GF/GFplots_PredictorCumulativePlot_",snp_set$set_code,"_SI.pdf")), width=7,height=7)make_predictor_cumulative_plot(x=snp_set$gf_mod)dev.off()# R2 measure of the fit of the random forest model for each speciesp <-tibble(snp=names(sort(snp_set$gf_mod$result)),importance=sort(snp_set$gf_mod$result)) %>%ggplot() +geom_point(aes(y=reorder(snp, importance),x=importance)) +ylab("") +xlab(expression(R^2)) +theme_bw() ggsave(p, filename =here(paste0("figs/GF/GFPlots_AlleleImportance_",snp_set$set_code,"_SI.pdf")), width=4, height=10)})
GO predictions
Code
snp_sets <-lapply(snp_sets, function(snp_set){snp_set$go <-lapply(clim_dfs$clim_pred, function(clim_pred){ref_pred <-predict(snp_set$gf_mod) # predictions under current climatesfut_pred <-predict(snp_set$gf_mod, as.data.frame(clim_pred[,clim_var])) # predictions under future climateslapply(1:nrow(ref_pred), function(x, ref_pred, fut_pred){as.numeric(pdist(ref_pred[x,], fut_pred[x,])@dist)}, fut_pred=fut_pred, ref_pred=ref_pred) %>%unlist()})return(snp_set)})
Relationship with Euclidean distance
Code
source(here("scripts/functions/make_eucli_plot.R"))# Calculate the Euclidean climatic distancelist_dist_env <- clim_dfs$clim_pred %>%lapply(function(clim_pred){Delta = clim_dfs$clim_ref %>% dplyr::select(any_of(clim_var)) - clim_pred %>% dplyr::select(any_of(clim_var)) dist_env =sqrt( rowSums(Delta^2) )})# Main gene pools (for the figures)gps <-readRDS(here("data/GenomicData/MainGenePoolPopulations.rds")) %>%arrange(pop)
# We generate scatter plots for the Supplementary Information.# ============================================================# Axis limits# ===========max_go <-lapply(snp_sets[c(1,3)], function(z){ z$go %>%unlist()}) %>%unlist() %>%max()range_eucli <- list_dist_env %>%unlist() %>%range()# Run the function# ================lapply(snp_sets, function(set_i) {p <-lapply(names(list_dist_env), function(gcm){make_ggscatterplot(x = list_dist_env[[gcm]],y = set_i$go[[gcm]],title=gcm,range_eucli = range_eucli,max_go = max_go)})# remove y-labels to graphs in the second columnp[[2]] <- p[[2]] +ylab("")p[[4]] <- p[[4]] +ylab("")# remove x-labels to graphs in the second and third rowsp[[1]] <- p[[1]] +xlab("")p[[2]] <- p[[2]] +xlab("")p[[3]] <- p[[3]] +xlab("")p[[6]] <-get_legend(p[[1]])for(i in1:5){p[[i]] <- p[[i]] +theme(legend.position ="none")} plot_grid(plotlist=p, nrow =3) %>%ggsave(here(paste0("figs/GF/ScatterPlotEucliDistance_",set_i$set_code,".pdf")), .,width=7,height=8,device="pdf")})
Comparing GO predictions
We look at the correlation across the different genomic offset predictions at the location of the populations, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.
# Function to make the genomic offset mapssource(here("scripts/functions/make_go_map.R"))# Population coordinatespop_coord <-readRDS(here(paste0("data/ClimaticData/MaritimePinePops/ClimatePopulationLocationPointEstimates_ReferencePeriods_noADJ.rds")))[[1]]$ref_means %>% dplyr::select(pop,longitude,latitude)# Find minimum and maximum values of genomic offset for the maps# go_limits <- lapply(snp_sets, function(x) {# lapply(names(list_dist_env), function(gcm){# x$go[[gcm]]# }) %>% unlist()# }) %>% unlist() %>% range()# # The minimum GO value is very very small, almost zero, so we fix it to zero.# go_limits[[1]] <- 0# Generate the maps for each set of SNPs and each GCMlapply(snp_sets, function(x) {go_maps <-lapply(names(list_dist_env), function(gcm){go_limits <-c(0,max(x$go[[gcm]]))df <- pop_coord %>%mutate(GO = x$go[[gcm]])make_go_map(df=df,plot_title=gcm,go_limits = go_limits,legend_box_background ="white",point_size =3)})legend_maps <-get_legend(go_maps[[1]])go_maps <-lapply(go_maps, function(y) y +theme(legend.position ="none"))go_maps$legend_maps <- legend_mapsgo_maps <-plot_grid(plotlist=go_maps)# save the figuresggsave(here(paste0("figs/GF/GOMaps_PopLocations_",x$set_code,".pdf")), go_maps, width=10,height=6, device="pdf")ggsave(here(paste0("figs/GF/GOMaps_PopLocations_",x$set_code,".png")), go_maps, width=10,height=6)# =========# Add title# =========title <-ggdraw() +draw_label( x$set_name,fontface ='bold',x =0,hjust =0 ) +theme(plot.margin =margin(0, 0, 0, 7))# merge title and plotsplot_grid( title, go_maps,ncol =1,rel_heights =c(0.1, 1))})
For each GCM, we attribute the value 1 to the top five populations with the highest genomic offset and we attribute the value 0 to the other populations. We then count the number of 1 for each population, which gives the table and map below:
Code
source(here("scripts/functions/make_high_go_pop_maps.R"))high_go_pops <-make_high_go_pop_maps(pop_coord=pop_coord,list_go = snp_sets$common_cand$go,ggtitle="GF",nb_id_pop =5) # number of selected populations
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2 3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
We project the genomic offset predictions for the set of candidate SNPs and the mean genomic offset across the five GCMs.
Code
# Extract scaling parameters, i.e. mean and variance scale_params <-lapply(clim_var, function(x){ vec_var <-readRDS(here(paste0("data/ClimaticData/MaritimePinePops/ClimatePopulationLocationPointEstimates_ReferencePeriods_noADJ.rds")))[["ref_1901_1950"]]$ref_means[,x] %>%pull()list(mean =mean(vec_var),sd =sd(vec_var)) }) %>%setNames(clim_var)
Code
# Buffer for the maps (maritime pine distribution)range_buffer =shapefile(here('data/Mapping/PinpinDistriEUforgen_NFIplotsBuffer10km.shp'))# We project the genomic offset only for the candidate SNPssnp_set <- snp_sets[["all_cand"]]# We load the rasters with the climates of the reference periodpath <-here("data/ClimaticData/ClimateDTRasters/1km_1901-1950_Extent-JulietteA/")ref_rasts <-lapply(clim_var, function(x) paste0(path,"/",x,".tif")) %>% raster::stack() %>%mask(range_buffer) # We extract the climatic values in a data frameclim_ref_df <- ref_rasts %>%rasterToPoints() %>%as.data.frame()# Scale the climatic variables with the initial scaling parametersfor(i in clim_var){clim_ref_df[,i] <- (clim_ref_df[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd}
Code
# GF predictions under current climatesref_pred <-predict(snp_set$gf_mod,clim_ref_df[,clim_var])df <-lapply(names(clim_dfs$clim_pred), function(gcm){ # for each GCM# Rasters with future climatespath <-here(paste0("data/ClimaticData/ClimateDTRasters/1km_",gcm,"_2041-2070_ssp370_Extent-JulietteA/"))clim_fut_df <-lapply(clim_var, function(x) paste0(path,"/",x,".tif")) %>% raster::stack() %>%mask(range_buffer) %>%rasterToPoints() %>%# extract the climatic values at each spatial pointsas.data.frame()# Scale the climatic variables with the initial scaling parametersfor(i in clim_var){clim_fut_df[,i] <- (clim_fut_df[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd}# GF predictions under future climatesfut_pred <-predict(snp_set$gf_mod, clim_fut_df[,clim_var]) # Calculate the genomic offsetGO <-lapply(1:nrow(ref_pred), function(x, ref_pred, fut_pred){as.numeric(pdist(ref_pred[x,], fut_pred[x,])@dist)}, fut_pred=fut_pred, ref_pred=ref_pred) %>%unlist()clim_ref_df %>% dplyr::select(x,y) %>%as_tibble() %>%mutate(GO = GO,gcm = gcm)}) %>% bind_rows %>%pivot_wider(values_from = GO, names_from = gcm) %>%mutate(mean_GO =rowMeans(dplyr::select(.,-c(x:y))))df %>%saveRDS(here("outputs/GF/go_pred_rasters.rds"))
MANUSCRIPT FIGURE: This figure corresponds to Figure 6b in the main manuscript.
Code
# Map options# ===========point_size =2x_limits =c(-10, 15)y_limits =c(31, 52)# Country bordersworld <-ne_countries(scale ="medium", returnclass ="sf")# Load the mean GO projectionsdf <-readRDS(here("outputs/GF/go_pred_rasters.rds"))p <-ggplot(data=df) +geom_sf(data = world, fill="gray98") +scale_x_continuous(limits = x_limits) +scale_y_continuous(limits = y_limits) +geom_raster(aes(x = x, y = y, fill = mean_GO), alpha =1) +scale_fill_gradient2(low="blue", mid="yellow", high="red",midpoint=(max(df$mean_GO)-min(df$mean_GO))/2,limits=c(min(df$mean_GO),max(df$mean_GO)),name ="Genomic offset from GF") +xlab("Longitude") +ylab("Latitude") +theme_bw(base_size =11) +theme(panel.grid =element_blank(), plot.background =element_blank(), panel.background =element_blank(), legend.position =c(0.8,0.15),legend.box.background =element_rect(colour ="gray80"),legend.title =element_text(size=10),strip.text =element_text(size=11))p %>%ggsave(here(paste0("figs/GF/GOmeanProjections_",snp_set$set_code,".pdf")),., width=6,height=6, device="pdf")p %>%ggsave(here(paste0("figs/GF/GOmeanProjections_",snp_set$set_code,".png")),., width=6,height=6)p
Validation - NFI plots
Predicting GO in the NFI plots
WARNING! The NFI climatic datasets used for GO predictions have to be scaled with the scaling parameters used for GO estimation (i.e., the mean and variance of the climatic variables at the location of the studied populations under the reference climate).
Code
# Load the climatic data of the NFI plots.nfi_clim <-readRDS(here("data/ClimaticData/NFIplots/NFIclimate.rds"))# Keep only the climatic variables of interest and scale the climatic data# Careful here! We have to scale the climatic variables of the reference period# with the scaling parameters used for estimating GO!source(here("scripts/functions/generate_scaled_nfi_clim_datasets.R"))nfi_dfs <-generate_scaled_nfi_clim_datasets(clim_var, clim_ref = nfi_clim$clim_ref, clim_pred = nfi_clim$clim_survey)#generate_scaled_clim_datasets(clim_var, clim_ref = nfi_clim$clim_ref, clim_pred = nfi_clim$clim_survey)# calculate the genomic offset for the NFI plotssnp_sets <-lapply(snp_sets, function(snp_set){ref_pred <-predict(snp_set$gf_mod, as.data.frame(nfi_dfs$clim_ref[,clim_var])) # predictions under reference-period climatesfut_pred <-predict(snp_set$gf_mod, as.data.frame(nfi_dfs$clim_pred[,clim_var])) # predictions under climates during survey periodsnp_set$go_nfi <-lapply(1:nrow(ref_pred), function(x, ref_pred, fut_pred){as.numeric(pdist(ref_pred[x,], fut_pred[x,])@dist)}, fut_pred=fut_pred, ref_pred=ref_pred) %>%unlist()return(snp_set)})# checking missing data# lapply(snp_sets, function(x) sum(is.na(x$go_nfi)))# Find minimum and maximum values of genomic offset for the maps# go_limits <- lapply(snp_sets, function(snp_set) snp_set$go_nfi) %>% unlist() %>% range()# # The minimum GO value is very very small, almost zero, so we fix it to zero.# go_limits[[1]] <- 0# map genomic offset predictions in the NFI plots lapply(snp_sets, function(x) { go_limits <-c(0, max(x$go_nfi)) df <-readRDS(here("data/ClimaticData/NFIplots/NFIclimate.rds"))[[1]] %>% dplyr::select(contains("ude")) %>%mutate(GO = x$go_nfi) p <-make_go_map(df= df, point_size =0.5,go_limits = go_limits,legend_position =c(0.85,0.2),y_limits =c(35, 51),plot_title = x$set_name)ggsave(here(paste0("figs/GF/NFI_GOmap_",x$set_code,".pdf")), p, width=6,height=6, device="pdf")ggsave(here(paste0("figs/GF/NFI_GOmap_",x$set_code,".png")), p, width=6,height=6)# If neededp <- p +theme(plot.title =element_blank())ggsave(here(paste0("figs/GF/NFI_GOmap_",x$set_code,"_noTitle.pdf")), p, width=6,height=6, device="pdf")# Show maps in the Quarto document# ================================p })
We look at the correlation across the different genomic offset predictions in the NFI plots, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.
How to explain high GO values in some of the NFI plots?
Interestingly, GO predictions for some of the NFI plots have considerably higher values than GO predictions at the location of the populations under future climates (section Section 4) or GO predictions projected across the species distribution (section Section 4.3.2) under future climates.
We can visualize these differences with the graph below that shows the distribution of GO predictions projected across the species range under future climates (i.e., with the 5 GCMs) and GO predictions at the location of the NFI plots during the survey period.
Code
readRDS(here("outputs/GF/go_pred_rasters.rds")) %>% dplyr::select(-x,-y) %>% dplyr::select(-mean_GO) %>%# we do not show the mean GO across GCMsset_colnames(paste0("Species range - Future climate with ",colnames(.))) %>%pivot_longer(cols =everything(), names_to ="projection") %>%bind_rows(tibble(projection="NFI plots - Climate during the inventory period",value=snp_sets$common_cand$go_nfi)) %>%ggplot(aes(x=value, group=projection, fill=projection)) +geom_density(adjust=1.5, alpha=.4) +theme_bw() +xlab("Genomic offset predictions") +ylab("Density") +theme(legend.position =c(0.7,0.8),legend.title =element_blank())
How can we explain these differences among the predictions?
Climatic differences
To understand, we first look at the climatic differences (1) at the location of the studied populations, (2) across the species range and (3) at the location of the NFI plots. We compare their reference climate, their climate for GO predictions (i.e., climate during the inventory period for the NFI plots and future climates for the populations and the species range), and their climatic differences between reference and inventory/future climate.
Code
########### DATASETS########### We use the following datasets# clim_dfs which contains reference and future climates at the location of the populations# nfi_dfs which contains reference and survey climates at the location of the nfi plots# clim_ref_df which contains reference climates across the species distribution# we also extract future climates across the species distribution (5 GCMs)clim_fut_rast <-lapply(names(clim_dfs$clim_pred), function(gcm){ # for each GCM# Rasters with future climatespath <-here(paste0("data/ClimaticData/ClimateDTRasters/1km_",gcm,"_2041-2070_ssp370_Extent-JulietteA/"))clim_fut_df <-lapply(clim_var, function(x) paste0(path,"/",x,".tif")) %>% raster::stack() %>%mask(range_buffer) %>%rasterToPoints() %>%# extract the climatic values at each spatial pointsas.data.frame()# Scale the climatic variables with the initial scaling parametersfor(i in clim_var){clim_fut_df[,i] <- (clim_fut_df[,i] - scale_params[[i]]$mean) / scale_params[[i]]$sd}clim_fut_df})names(clim_fut_rast) <-names(clim_dfs$clim_pred)
Reference climates
Code
####################################### Density plots of reference climates######################################lapply(clim_var, function(x){## Legend title var_name <-extract_climatedt_metadata(var_clim = x) %>%mutate(var_legend=paste0(description, " (", label,"; ",unit_symbol,")")) %>%pull(var_legend)# Climatic values at the location of the populationstibble(group="Populations",value=clim_dfs$clim_ref[[x]]) %>%bind_rows(tibble(group="NFI plots", value=nfi_dfs$clim_ref[[x]])) %>%bind_rows(tibble(group="Species distribution", value=clim_ref_df$bio1)) %>%ggplot(aes(x=value, group=group, fill=group)) +xlab(var_name) +geom_density(adjust=1.5, alpha=.4) +theme_bw() +ggtitle("Reference climates (1901-1950)") +theme(legend.position =c(0.2,0.8),legend.title =element_blank())})
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
Future climates
Code
lapply(clim_var, function(x){## Legend title var_name <-extract_climatedt_metadata(var_clim = x) %>%mutate(var_legend=paste0(description, " (", label,"; ",unit_symbol,")")) %>%pull(var_legend)# Climatic values at the location of the populationslapply(names(clim_dfs$clim_pred), function(gcm){tibble(group="Populations",value=clim_dfs$clim_pred[[gcm]][[x]]) %>%bind_rows(tibble(group="NFI plots", value=nfi_dfs$clim_pred[[x]])) %>%bind_rows(tibble(group="Species distribution", value=clim_fut_rast[[gcm]][[x]])) %>%mutate(gcm=gcm) }) %>%bind_rows() %>%ggplot(aes(x=value, group=group, fill=group)) +xlab(var_name) +ylab("Density") +geom_density(adjust=1.5, alpha=.4) +theme_bw() +facet_wrap(~gcm)+ggtitle("Future climates and inventory climates") +theme(legend.position =c(0.85,0.2),legend.title =element_blank())})
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
Climatic differences
We look at the climatic differences between reference climates and future/inventory climates.
These last graphs are useful to understand what is going on.
bio4 is the most important variable to explain the gene-climate relationships and thus to predict GO. For bio4, the climatic differences at the location of the NFI plots have higher minimum and maximum values than climatic differences at the location of the studied populations or across the species range (under future climates). More precisely, some NFI plots experienced strong deviations in bio4 during the survey period, which are larger than the mean deviations expected under climate change.
This trend may seem surprising at first glance. However, it is important to remember that predictions of future climates are 20-year averages, whereas climates for NFI plots are calculated over shorter inventory periods. Consequently, NFI plot climates are likely to be subject to greater deviations from long-term climate averages, particularly for plots with shorter inventory periods.
Mapping
I generated maps to visualize the climatic differences but it does not help to visualize the higher climatic differences in some of the NFI plots for bio4.
Finally, we look at the relationship between the climatic differences in the NFI plots (climate during the inventory period - climate during the reference period) and GO predictions.
We can see that the high GO values in the NFI plots are associated with strong deviations of bio4 from its values under the reference period. I think this is the reason why we obtained higher maximum GO values in the NFI plots than across the species range under future climates.
Code
lapply(clim_var, function(x){## Legend title var_name <-extract_climatedt_metadata(var_clim = x) %>%mutate(var_legend=paste0(description, " (", label,"; ",unit_symbol,")")) %>%pull(var_legend)tibble(group="NFI plots", value_ref=nfi_dfs$clim_ref[[x]],value_pred=nfi_dfs$clim_pred[[x]],diff=value_pred - value_ref,go_pred=snp_sets$common_cand$go_nfi) %>%ggplot(aes(x=diff, y=go_pred)) +geom_point(alpha=0.5) +xlab("Climate during the survey period - Reference climate") +ylab("Genomic offset predictions with GF and the common candidate SNPs") +ggtitle(var_name) +geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +theme_bw()})
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
Validation - Common gardens
Code
cg_clim <-readRDS(here("data/ClimaticData/CommonGardens/ClimateCG.rds")) %>% dplyr::select(cg,any_of(clim_var))cg_coord <-readRDS(here("data/ClimaticData/CommonGardens/ClimateCG.rds")) %>% dplyr::select(cg,contains("ude"))cg_names <-unique(cg_coord$cg)# Generate scaled climatic datasets with climatic data at the location of the populations and at the location of the common gardenscg_dfs <-generate_scaled_clim_datasets(clim_var, clim_pred = cg_clim)# Predict genomic offset of each population when transplanted in the climate of the common gardenssnp_sets <-lapply(snp_sets, function(snp_set){ref_pred <-predict(snp_set$gf_mod, as.data.frame(cg_dfs$clim_ref[,clim_var])) # predictions under reference-period climatesfut_pred <-predict(snp_set$gf_mod, as.data.frame(cg_dfs$clim_pred[,clim_var])) # predictions under climates during survey periodsnp_set$go_cg <-lapply(1:nrow(ref_pred), function(x, ref_pred, fut_pred){as.numeric(pdist(ref_pred[x,], fut_pred)@dist)}, fut_pred=fut_pred, ref_pred=ref_pred) %>%setNames(cg_dfs$clim_ref[["pop"]]) %>%as.data.frame() %>%t() %>%as.data.frame() %>%set_colnames(cg_dfs$clim_pred[["cg"]]) %>%rownames_to_column(var="pop") %>%as_tibble()return(snp_set)})# Map genomic offset predictions at the locations of the populationsgo_maps_cg <-lapply(cg_names, function(cg_name){p <-lapply(snp_sets, function(x){df <- pop_coord %>%left_join(x$go_cg[,c("pop",cg_name)], by="pop") %>% dplyr::rename(GO=all_of(cg_name)) p <-make_go_map(df = df,point_size =3,type ="CG",go_limits =c(0, max(df$GO)),cg_coord =filter(cg_coord, cg == cg_name),plot_title =paste0(str_to_title(cg_name), " - ",x$set_name),legend_position =c(0.8,0.25))ggsave(filename =here(paste0("figs/GF/GOmap_",x$set_code,"_",cg_name,".pdf")), device ="pdf",width=5,height=5)ggsave(filename =here(paste0("figs/GF/GOmap_",x$set_code,"_",cg_name,".png")), width=5,height=5)# p <- p + theme(plot.title = element_blank(), legend.position = "none")# ggsave(filename = here(paste0("figs/GF/GOmap_",x$set_code,"_",cg_name,"_noTitle.pdf")), device = "pdf",width=5,height=5)# ggsave(filename = here(paste0("figs/GF/GOmap_",x$set_code,"_",cg_name,"_noTitle.png")), width=5,height=5)p })plot_grid(plotlist = p,nrow=2)}) %>%setNames(cg_names)pdf(here("figs/GF/GOmaps_CGs.pdf"), width=18,height=11)lapply(go_maps_cg, function(x) x)dev.off()# show mapslapply(go_maps_cg, function(x) x)
We look at the correlation across the different genomic offset predictions in the common gardens, i.e. those based on all SNPs and those based on sets of candidates or control SNPs.
Fitzpatrick, Matthew C, Vikram E Chhatre, Raju Y Soolanayakanahally, and Stephen R Keller. 2021. “Experimental Support for Genomic Prediction of Climate Maladaptation Using the Machine Learning Approach Gradient Forests.”Molecular Ecology Resources. https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.13374.